//check naics of a certain length

/*
This program tests a naics variable of a given length against thelist of valid naics values for a given year.
If a naics variable is invalid it is replace with missing.
It takes three arguments that must be given in order and are mandatory:
length (a number 2-6 which is the length of the NAICS variable being tested)
naics_var (the name of the naics variable in the dataset)
year_var (the name of the year variable in the dataset)
*/

args length naics_var year_var


rename `naics_var' naics`length'

//merge with each iteration of the NAICS by NAICS

foreach naics_year in 2002 2007 2012 2017 2022 {
	merge m:1 naics`length' using "/cleaning/valid_naics/naics`length'_`naics_year'", keep(master matched) keepusing(naics`length') gen(valid_naics_`naics_year')
}

rename naics`length' `naics_var'

//Replace with missing if it does not match the year it is supposed to

replace `naics_var' = . if valid_naics_2002 == 1 & `year_var' <= 2001
replace `naics_var' = . if valid_naics_2002 == 1 & inrange(`year_var',2002,2006)
replace `naics_var' = . if valid_naics_2007 == 1 & inrange(`year_var',2007,2011)
replace `naics_var' = . if valid_naics_2012 == 1 & inrange(`year_var',2012,2016)
replace `naics_var' = . if valid_naics_2017 == 1 & inrange(`year_var',2017,2021)
replace `naics_var' = . if valid_naics_2022 == 1 & `year_var' >= 2022

drop valid_naics*
